********************************************************************************
*** PPI Analysis                                                             ***
*** Author:   Adam Osman     				            					 ***								
*** Last Update: September /2021                                             ***                                         
***                                                                          ***                                                       
*** Files used:                                                              ***                                                       
***   PPI - Balance.dta 			                                     	 ***
***   PPI - Survey.dta														 ***
***	  PPI - Graph.dta														 ***
***	  CSSII Baseline Clean.dta												 ***
***   Application Characterisitcs.dta 										 ***
********************************************************************************

clear all
set more off

////////////////////////////////////////////////////////////////////////////////
*Set Globals
////////////////////////////////////////////////////////////////////////////////

global home "C:\Users\EKP4095\Dropbox\CSS - RA Work\PPI Paper\Publishable" // --- change directory
global raw "$home\Raw"
global data "$home\Data"
global code "$home\Code"
global result "$home\Result"

////////////////////////////////////////////////////////////////////////////////
*Table 1 
////////////////////////////////////////////////////////////////////////////////

*panel A
use "$data\PPI - Balance.dta", clear
gen bank_n = 1 if bank == "FICO"
replace bank_n = 2 if bank == "FMB"

forval i = 2/10 {
	gen pair_`i' = (pair == `i')
}

matrix P = J(8,3,.)
loc i = 1 
loc j = 1

local balancevar povertyheadcount yearopened numberofaos 
foreach var of local balancevar {
	ttest `var', by(treatment) 
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(N_1)
	matrix P[`i'+1,`j'+1] = r(N_2)
	randcmdci reg `var' treatment bank_n pair_*, treatvars(treatment) testvars(treatment) reps(1000) // results not the same.
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1] // Treatment value will not line up with column 3 (OLS ITT estimate) because of adjustments from the controls. Therefore, we add column 1 (control value) with column 3 to obtain column 2.  
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}

*panel B
use "$data\Application Characteristics", clear
ttest months_in_exp, by(ppitreatment)
	matrix P[7,1] = r(mu_1)
	matrix P[8,1] = r(N_1)
	matrix P[8,2] = r(N_2)
randcmdci reg months_in_exp ppitreatment bank _P*, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // results not the same.
	matrix P[7,3] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[7,1] + e(b)[1,1]
	matrix P[8,3] = e(HB)[1,4]

putexcel set "$result\table1.xlsx", replace
putexcel A1 = matrix(P), nformat(#.000)
	
////////////////////////////////////////////////////////////////////////////////
*Table 2
////////////////////////////////////////////////////////////////////////////////

use "$data\CSSII Baseline Clean.dta", clear


matrix P = J(26,3,.)
loc i = 7 
loc j = 1

randcmdci reg ihs_tassets ppitreatment bank _P*, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id)

foreach var in pov_lk ppiscore ihs_income ihs_tassets biz_num homeown std_index1  {
	ttest `var', by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
	randcmdci reg `var' ppitreatment bank _P*, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // randomization inference with 1000 permuationts per Young (2019)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}

loc i = 21
loc j = 1
*panel D
ttest gotloan if loan_decision == 1, by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
randcmdci reg gotloan ppitreatment bank _P* if loan_decision == 1, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]

loc i = 23

foreach var in loansize default_t3b {
	ttest `var' if gotloan == 1, by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
	randcmdci reg `var' ppitreatment bank _P* if gotloan == 1, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // randomization inference with 1000 permuationts per Young (2019)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}


estpost tab ppitreatment 
matrix N = e(b)
estpost tab ppitreatment if loan_decision == 1
matrix N = N\e(b)
estpost tab ppitreatment if gotloan == 1
matrix N = N\e(b)
 
*panel A
use "$data\Application Characteristics", clear
loc i = 1
loc j = 1

gen FICO = bank == 2
gen FMB = bank == 1

foreach var in numapps numinpov percentinpov {
	ttest `var', by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
	randcmdci reg `var' ppitreatment bank _P*, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // randomization inference with 1000 permuationts per Young (2019)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}

estpost tab ppitreatment 
matrix N = e(b)\N

putexcel set "$result\table2.xlsx", replace
putexcel A1 = matrix(P), nformat(#.000)
putexcel E1 = matrix(N)

////////////////////////////////////////////////////////////////////////////////
*Table 3
////////////////////////////////////////////////////////////////////////////////

use "$data\PPI - Survey.dta", clear

*Most important reason
gen reasonchoose = 2 if ppisurveyreasonchoose == 3
replace reasonchoose = 1 if inlist(ppisurveyreasonchoose,1,2)
replace reasonchoose = 3 if strpos(ppisurveyreasonchoose_other, "respected") > 0 
replace reasonchoose = 3 if strpos(ppisurveyreasonchoose_other, "first choice") > 0 
replace reasonchoose = 1 if missing(reasonchoose)

table reasonchoose, stat(proportion, total)
collect export  "$result/table_3_1(1).xlsx", replace
table reasonchoose ppitreatment, stat(proportion, across(reasonchoose))
collect export  "$result/table_3_1(2).xlsx", replace

*Name 3 things
count if ~missing(ppisurveylikemost1)
local total = r(N)
count if ppitreatment == 0
local control = r(N)
count if ppitreatment == 1
local treatment = r(N)

matrix P = J(10,3,.)

qui forval i = 1/7 {
count if ppisurveylikemost1 == `i' | ppisurveylikemost2 == `i' | ppisurveylikemost3 == `i'
	matrix P[`i',1] = r(N) / `total'
count if (ppisurveylikemost1 == `i' | ppisurveylikemost2 == `i' | ppisurveylikemost3 == `i') & ppitreatment == 0
	matrix P[`i',2] = r(N) / `control'
count if (ppisurveylikemost1 == `i' | ppisurveylikemost2 == `i' | ppisurveylikemost3 == `i') & ppitreatment == 1
	matrix P[`i',3] = r(N) / `treatment'
}

replace ppisurveylikemost_other = lower(ppisurveylikemost_other)

*Building Human Capital
count if strpos(ppisurveylikemost_other, "learning") > 0 | strpos(ppisurveylikemost_other, "knowledge") > 0 | strpos(ppisurveylikemost_other, "improve") > 0 | strpos(ppisurveylikemost_other, "work experience") > 0 
	matrix P[8,1] = r(N) / `total'
count if ( strpos(ppisurveylikemost_other, "learning") > 0 | strpos(ppisurveylikemost_other, "knowledge") > 0 | strpos(ppisurveylikemost_other, "improve") > 0 | strpos(ppisurveylikemost_other, "work experience") > 0 )  & ppitreatment == 0 
	matrix P[8,2] = r(N) / `control'
count if ( strpos(ppisurveylikemost_other, "learning") > 0 | strpos(ppisurveylikemost_other, "knowledge") > 0 | strpos(ppisurveylikemost_other, "improve") > 0 | strpos(ppisurveylikemost_other, "work experience") > 0 )  & ppitreatment == 1 
	matrix P[8,3] = r(N) / `treatment'

*Convenience of job
count if strpos(ppisurveylikemost_other, "convenient") > 0 | strpos(ppisurveylikemost_other, "light") > 0 | strpos(ppisurveylikemost_other, "accessible") > 0 | strpos(ppisurveylikemost_other, "easy") > 0 | strpos(ppisurveylikemost_other, "good policy") > 0
	matrix P[9,1] = r(N) / `total'
count if ( strpos(ppisurveylikemost_other, "convenient") > 0 | strpos(ppisurveylikemost_other, "light") > 0 | strpos(ppisurveylikemost_other, "accessible") > 0 | strpos(ppisurveylikemost_other, "easy") > 0 | strpos(ppisurveylikemost_other, "good policy") ) & ppitreatment == 0
	matrix P[9,2] = r(N) / `control'
count if ( strpos(ppisurveylikemost_other, "convenient") > 0 | strpos(ppisurveylikemost_other, "light") > 0 | strpos(ppisurveylikemost_other, "accessible") > 0 | strpos(ppisurveylikemost_other, "easy") > 0 | strpos(ppisurveylikemost_other, "good policy") ) & ppitreatment == 1
	matrix P[9,3] = r(N) / `treatment'

*Enjoy their co-workers
count if strpos(ppisurveylikemost_other, "relationship") > 0 | strpos(ppisurveylikemost_other, "friendly") > 0 | strpos(ppisurveylikemost_other, "people") > 0 
	matrix P[10,1] = r(N) / `total'
count if ( strpos(ppisurveylikemost_other, "relationship") > 0 | strpos(ppisurveylikemost_other, "friendly") > 0 | strpos(ppisurveylikemost_other, "people") > 0 ) & ppitreatment == 0
	matrix P[10,2] = r(N) / `control'
count if ( strpos(ppisurveylikemost_other, "relationship") > 0 | strpos(ppisurveylikemost_other, "friendly") > 0 | strpos(ppisurveylikemost_other, "people") > 0  ) & ppitreatment == 1
	matrix P[10,3] = r(N) / `treatment'
	
putexcel set "$result\table_3_2.xlsx", replace
putexcel A1 = matrix(P), nformat(#.000)

/*******************
1 -- good salary
2 -- reasonable working hours
3 -- accomodating superiors
4 -- I can really help
5 -- significant chances for promotion
6 -- find a better job
7 -- job security
*******************/

*Bank or NGO
table ppisurveybanksocdev, stat(proportion, total)
collect export  "$result/table_3_3(1).xlsx", replace
table ppisurveybanksocdev ppitreatment, stat(proportion, across(ppisurveybanksocdev))
collect export  "$result/table_3_3(2).xlsx", replace

estpost tab ppitreatment 
count

putexcel set "$result/table_3_3(2).xlsx", modify
putexcel A10 = matrix(e(b)) 
putexcel C10 = `r(N)'


////////////////////////////////////////////////////////////////////////////////
*Table 4
////////////////////////////////////////////////////////////////////////////////

estimates clear
use "$data\PPI - Survey.dta", clear
gen dummy = 1

local ppivarlist ///
ppisurveychildbank04 ppisurveychildfam04 ///
ppisurveywallmembank ppisurveywallmemfam ///
ppisurveyroofbank ppisurveyrooffam ///
ppisurveyschoolbank614 ppisurveyschoolfam614 ///
ppisurveyfemheadbank ppisurveyfemheadfam ///
ppisurveyother1membank ppisurveyother1memfam ///
ppisurveytoiletbank ppisurveytoiletfa ///
ppisurveyrefrigbank ppisurveyrefrigfam ///
ppisurveytvbank ppisurveytvfam ///
ppisurveywashmachinebank ppisurveywashmachinefam 

loc i = 1 
foreach var of local ppivarlist {
replace `var' = 0 if `var' == 3
replace `var' = -1 if `var' == 2
reg `var' dummy, robust
estimates store reg_`i'
loc i = `i' + 1
}

g bank_i= .1*(ppisurveychildbank04 -ppisurveyschoolbank614 -ppisurveyfemheadbank -ppisurveyother1membank+ppisurveywallmembank +ppisurveyroofbank- ppisurveytoiletbank- ppisurveyrefrigbank- ppisurveytvbank-ppisurveywashmachinebank)
g fam_i= .1*(ppisurveychildfam04 -ppisurveyschoolfam614 -ppisurveyfemheadfam -ppisurveyother1memfam+ppisurveywallmemfam +ppisurveyrooffam- ppisurveytoiletfam- ppisurveyrefrigfam- ppisurveytvfam-ppisurveywashmachinefam)

reg bank_i, cluster(brid)
estimates store reg_22 
estadd scalar pval1 = r(table)["pvalue","_cons"]
reg fam_i, cluster(brid)
estimates store reg_23
estadd scalar pval2 = r(table)["pvalue","_cons"]

esttab reg_* using "$result/table_4.csv", keep(_cons) constant cells(b(star fmt(%9.6f)) se(fmt(%9.6f))) scalars("pval1" "pval2") starlevel(* .1 ** .05 *** .01) stardetach style(tab) varwidth(8) modelwidth(8) plain replace
estimates clear

////////////////////////////////////////////////////////////////////////////////
*Appendix Table 1 
////////////////////////////////////////////////////////////////////////////////

use "$data\CSSII Baseline Clean.dta", clear

matrix P = J(20,3,.)
loc i = 1
loc j = 1

foreach var of varlist c_1-c_10  {
	ttest `var', by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
	randcmdci reg `var' ppitreatment bank _P*, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // randomization inference with 1000 permuationts per Young (2019)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}

estpost tab ppitreatment

putexcel set "$result\appendix_table1.xlsx", replace
putexcel A1 = matrix(P), nformat(#.000)
putexcel E1 = matrix(e(b))

////////////////////////////////////////////////////////////////////////////////
*Appendix Table 2
////////////////////////////////////////////////////////////////////////////////

use "$data\CSSII Baseline Clean.dta", clear
keep if loan_decision == 1

matrix P = J(20,3,.)
loc i = 7 
loc j = 1

foreach var in pov_lk ppiscore ihs_income ihs_tassets biz_num homeown std_index1  {
	ttest `var', by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
	randcmdci reg `var' ppitreatment bank _P*, treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // randomization inference with 1000 permuationts per Young (2019)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}

estpost tab ppitreatment 
matrix N = e(b)

*panel A
use "$data\Application Characteristics", clear

loc i = 1
loc j = 1

foreach var in numclients1 numclients_inpov percentcl_inpov {
	ttest `var', by(ppitreatment)
	matrix P[`i',`j'] = r(mu_1)
	matrix P[`i'+1,`j'] = r(sd_1)/sqrt(r(N_1))
	matrix P[`i'+1,`j'+1] = r(sd_2)/sqrt(r(N_2))
	randcmdci reg `var' ppitreatment bank _P* , treatvars(ppitreatment) testvars(ppitreatment) reps(1000) cluster(branch_id) // randomization inference with 1000 permuationts per Young (2019)
	matrix P[`i',`j'+2] = e(b)[1,1]
	matrix P[`i',`j'+1] = P[`i',`j'] + e(b)[1,1]
	matrix P[`i'+1,`j'+2] = e(HB)[1,4]
	
	loc i = `i' + 2
}

estpost tab ppitreatment 
matrix N = e(b)\N

putexcel set "$result\appendix_table2.xlsx", replace
putexcel A1 = matrix(P), nformat(#.000)
putexcel E1 = matrix(N)

////////////////////////////////////////////////////////////////////////////////
*Appendix Table 3
////////////////////////////////////////////////////////////////////////////////

use "$data\CSSII Baseline Clean.dta", clear

eststo: reg default ppiscore FICO if gotloan==1, cl(branch_id)
eststo: reg default ppiscore cs_FMB cs_FICO FICO if gotloan==1, cl(branch_id)
eststo: reg default pov_lk FICO if gotloan==1, cl(branch_id)
eststo: reg default pov_lk cs_FMB cs_FICO FICO if gotloan==1, cl(branch_id)

esttab * using "$result\appendix_table3.csv", keep(*) constant cells(b(star fmt(%9.6f)) se(fmt(%9.6f)))  starlevel(* .1 ** .05 *** .01) stardetach style(tab) varwidth(8) modelwidth(8) plain replace
estimates clear

////////////////////////////////////////////////////////////////////////////////
*Figure 1 
////////////////////////////////////////////////////////////////////////////////

use "$data\PPI - Graph.dta", clear
sort ppiscore

twoway  (line pov_lk ppiscore, lc(black) yaxis(1) lp(shortdash)) ///
 (kdensity ppiscore if ppitreatment==0, range(0 100) yaxis(2) bw(3) lc(gs8) lp(longdash)) ///
 (kdensity ppiscore if ppitreatment==1, range(0 100) yaxis(2) ytitle("Density", axis(2)) ytitle("", axis(1)) bw(3) lc(black)) ///
 (kdensity ppi [fw=fweight], yaxis(2) bw(3) lc(gs12) lp(shortdash)) , xtitle("PPI Score") legend(order(2 "Control PPI Score" 3 "Treatment PPI Score" 1 "Poverty Likelihood" 4 "National PPI Score") region(lstyle(none)) position(1) bplacement(nwest)) graphregion(fcolor(white)) plotregion(fcolor(white)) ylabel(, axis(1) angle(0)) ylabel(, axis(2) angle(0))
 
**KS test**
ksmirnov ppiscore, by(ppitreatment)
graph export "$result\figure1.png", replace
